#!/usr/bin/perl -w

#
# Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
#
# This file is part of Bowtie 2.
#
# Bowtie 2 is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Bowtie 2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
#

#
# Generate lookup table that, given a packed DNA byte (four bases) and
# a character (A, C, G or T), returns how many times that character
# occurs in that packed byte.  Useful for quickly counting character
# occurrences in long strings.  The LUT is indexed first by character
# (0-3) then by byte (0-255).
#
# Larger lookup tables are also possible, though they seem
# counterproductive.  E.g., looking up eight bases at a time yields a
# 256K LUT, which doesn't fit in L1.  A four-base LUT is 1KB, easily
# fitting in L1.
#
# See ebwt.h. 
#

my @as4 = (), @as3 = (), @as2 = (), @as1 = ();
my @cs4 = (), @cs3 = (), @cs2 = (), @cs1 = ();
my @gs4 = (), @gs3 = (), @gs2 = (), @gs1 = ();
my @ts4 = (), @ts3 = (), @ts2 = (), @ts1 = ();

# Compile character arrays
my $i;
for($i = 0; $i < 256; $i++) {
	my $b01 = ($i >> 0) & 3;
	my $b23 = ($i >> 2) & 3;
	my $b45 = ($i >> 4) & 3;
	my $b67 = ($i >> 6) & 3;
	
	my $a4 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0) + ($b67 == 0);
	my $c4 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1) + ($b67 == 1);
	my $g4 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2) + ($b67 == 2);
	my $t4 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3) + ($b67 == 3);

	push @as4, $a4;
	push @cs4, $c4;
	push @gs4, $g4;
	push @ts4, $t4;

	my $a3 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0);
	my $c3 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1);
	my $g3 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2);
	my $t3 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3);

	push @as3, $a3;
	push @cs3, $c3;
	push @gs3, $g3;
	push @ts3, $t3;

	my $a2 = ($b01 == 0) + ($b23 == 0);
	my $c2 = ($b01 == 1) + ($b23 == 1);
	my $g2 = ($b01 == 2) + ($b23 == 2);
	my $t2 = ($b01 == 3) + ($b23 == 3);

	push @as2, $a2;
	push @cs2, $c2;
	push @gs2, $g2;
	push @ts2, $t2;

	my $a1 = ($b01 == 0) + 0;
	my $c1 = ($b01 == 1) + 0;
	my $g1 = ($b01 == 2) + 0;
	my $t1 = ($b01 == 3) + 0;

	push @as1, $a1;
	push @cs1, $c1;
	push @gs1, $g1;
	push @ts1, $t1;
}

my $entsPerLine = 16;

print "#include <stdint.h>\n\n";
print "/* Generated by gen_lookup_tables.pl */\n\n";

# Count occurrences in all 4 bit pairs 

print "uint8_t cCntLUT_4[4][4][256] = {\n";
print "\t/* All 4 bit pairs */ {\n";

# Print As array
print "\t\t/* As */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$as4[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Cs array
print "\t\t/* Cs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$cs4[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Gs array
print "\t\t/* Gs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$gs4[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Ts array
print "\t\t/* Ts */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$ts4[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t}\n\t},\n";

# Count occurrences in low 1 bit pair

print "\t/* Least significant 1 bit pair */ {\n";

# Print As array
print "\t\t/* As */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$as1[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Cs array
print "\t\t/* Cs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$cs1[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Gs array
print "\t\t/* Gs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$gs1[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Ts array
print "\t\t/* Ts */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$ts1[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t}\n\t},\n";

# Count occurrences in low 2 bit pairs

print "\t/* Least significant 2 bit pairs */ {\n";

# Print As array
print "\t\t/* As */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$as2[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Cs array
print "\t\t/* Cs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$cs2[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Gs array
print "\t\t/* Gs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$gs2[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Ts array
print "\t\t/* Ts */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$ts2[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t}\n\t},\n";

# Count occurrences in low 3 bit pairs

print "\t/* Least significant 3 bit pairs */ {\n";

# Print As array
print "\t\t/* As */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$as3[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Cs array
print "\t\t/* Cs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$cs3[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Gs array
print "\t\t/* Gs */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$gs3[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t},\n";

# Print Ts array
print "\t\t/* Ts */ {\n";
for($i = 0; $i < 256; $i++) {
	print "\t\t\t" if(($i % $entsPerLine) == 0);
	print "$ts3[$i], ";
	print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
}
print "\t\t}\n\t}\n";

print "};\n";
